********************************************************************************
********************************************************************************
** Descriptive statistics
** Includes:
**		1) Create variables reflecting total population values instead of sample
**			values for both primary and secondary school students.
**		2) Present summary statistics for both the entire population of primary 
**			and secondary school students, and sample students.
**		3) Illustrate FTNA score differences between private and public school
**			students conditional on PSLE scores.
**		4) Illustrate distribution of subject-specific PSLE and FTNA scores.
**		5) Present regional distribution of both entire population of secondary
**			school students, and the sample students.
********************************************************************************
********************************************************************************

** Descriptive table in main text
use "$dataraw_path\data_ftna_publication.dta", clear
append using "$dataraw_path\data_psle_publication.dta"

replace gpa_psle_core_pop = "" if gpa_psle_core_pop=="nan"
replace gpa_psle_other_pop = "" if gpa_psle_other_pop=="nan"
replace private_psle_pop = "" if private_psle_pop=="nan"
replace gpa_psle_core_pop = "" if gpa_psle_other_pop=="" | private_psle_pop==""
replace gpa_psle_other_pop = "" if gpa_psle_core_pop=="" | private_psle_pop==""
replace private_psle_pop = "" if gpa_psle_other_pop=="" | gpa_psle_core_pop==""
destring gpa_psle_core_pop gpa_psle_other_pop private_psle_pop, replace

foreach var of varlist gpa_ftna_core private female cohort16 ///
cohort17 religion bible_school islam_school female_school male_school ///
mix_school peers_score_core peers_fail_share peers_as_share uncommon_name {
	gen `var'_pop = `var' if gpa_ftna_core!=. & private!=. & peers_score_core!=.
	drop `var'
	rename `var'_pop `var'
}
drop gpa_psle_core gpa_psle_other private_psle
rename gpa_psle_core_pop gpa_psle_core
rename gpa_psle_other_pop gpa_psle_other
rename private_psle_pop private_psle

eststo pop_mean: estpost summarize gpa_ftna_core gpa_psle_core ///
gpa_psle_other private_psle private female uncommon_name ///
peers_score_core peers_fail_share peers_as_share cohort16 cohort17 religion ///
bible_school islam_school female_school male_school ///
if peers_fail_share!=. | gpa_psle_core!=., meanonly

use "$dataraw_path\data_ftna_publication.dta", clear
eststo sample_mean: estpost summarize gpa_ftna_core gpa_psle_core ///
gpa_psle_other private_psle private female uncommon_name ///
peers_score_core peers_fail_share peers_as_share cohort16 cohort17 religion ///
bible_school islam_school female_school male_school if sample==1, meanonly

eststo private: estpost summarize gpa_ftna_core gpa_psle_core ///
gpa_psle_other private_psle private female uncommon_name ///
peers_score_core peers_fail_share peers_as_share cohort16 cohort17 religion ///
bible_school islam_school female_school male_school ///
if sample==1 & private==1, meanonly

eststo public: estpost summarize gpa_ftna_core gpa_psle_core ///
gpa_psle_other private_psle private female uncommon_name ///
peers_score_core peers_fail_share peers_as_share cohort16 cohort17 religion ///
bible_school islam_school female_school male_school ///
if sample==1 & private==0, meanonly

gen temp_private = private*(-1)
eststo diff: estpost ttest gpa_ftna_core gpa_psle_core ///
gpa_psle_other private_psle private female uncommon_name ///
peers_score_core peers_fail_share peers_as_share cohort16 cohort17 religion ///
bible_school islam_school female_school male_school ///
if sample==1, by(temp_private) unequal
drop temp_private


* Output of descriptive statistics
esttab pop_mean sample_mean private public diff using ///
"$out_path\table2.tex", replace ///
cells("mean(pattern(1 1 1 1 0) fmt(3)) b(pattern(0 0 0 0 1) fmt(3)) se(pattern(0 0 0 0 1) fmt(3))") ///
varlabels(gpa_ftna_core "GPA FTNA" gpa_psle_core "GPA PSLE" ///
gpa_psle_other "GPA PSLE other" private_psle "Private primary" ///
private "Private secondary" /*school_size "Secondary school size"*/ ///
peers_score_core "Peers PSLE" peers_fail_share "Peers failed (PSLE)" ///
peers_as_share "Peers with A (PSLE)" female "Female" ///
religion "Religious courses" uncommon_name "Uncommon name" ///
bible_school "Bible course" islam_school "Islam course" ///
cohort16 "Cohort 2016" cohort17 "Cohort 2017" ///
female_school "Girls only (secondary)" male_school "Boys only (secondary)")


** Figure: FTNA score differences between private and public school students
gen gpa_psle_level = .
gen gpa_ftna_diff = .
gen gpa_ftna_pub = .
gen gpa_ftna_pub_diff = .
gen lower_bound = .
gen upper_bound = .

forvalues i = 1/13 {
	gen temp1 = -1/3 + `i'/3 -0.1
	gen temp2 = -1/3 + `i'/3 +0.1

	sum gpa_ftna_core if sample==1 & private==0 & temp1<gpa_psle_core & ///
		temp2>gpa_psle_core
	gen N`i'_0 = r(N)
	gen mean`i'_0 = r(mean)
	gen sd`i'_0 = r(sd)

	sum gpa_ftna_core if sample==1 & private==1 & temp1<gpa_psle_core & ///
		temp2>gpa_psle_core
	gen N`i'_1 = r(N)
	gen mean`i'_1 = r(mean)
	gen sd`i'_1 = r(sd)

	gen sd_err`i' = sqrt(sd`i'_0^2/N`i'_0 + sd`i'_1^2/N`i'_1)

	replace gpa_psle_level = -1/3 + `i'/3 in `i'

	replace gpa_ftna_diff = mean`i'_1 - mean`i'_0 in `i'

	replace lower_bound = gpa_ftna_diff - 2*sd_err`i' in `i'

	replace upper_bound = gpa_ftna_diff + 2*sd_err`i' in `i'

	drop temp1 temp2
}

twoway rarea lower_bound upper_bound gpa_psle_level in 4/13, color(gs8) || ///
connected gpa_ftna_diff gpa_psle_level in 4/13, lpattern(solid) ///
legend(order(2 1) label(1 "95% confidence interval") ///
label(2 "Mean difference")) ytitle("GPA FTNA gap (private - public)" " ") ///
xtitle(" " "GPA PSLE") graphregion(color(white))
graph export "$out_path\figure1.tif", replace


** Appendix: distribution of subject specific exam scores
egen temp_sample_size0 = sum(sample) if private==0
egen temp_sample_size1 = sum(sample) if private==1
gen temp_score_dummy = .
gen kiswahili_psle_num_share = .
gen english_psle_num_share = .
gen math_psle_num_share = .
gen kiswahili_ftna_num_share = .
gen english_ftna_num_share = .
gen math_ftna_num_share = .

foreach var of varlist kiswahili_psle_num english_psle_num math_psle_num ///
kiswahili_ftna_num english_ftna_num math_ftna_num {
	forvalues i=0/4 {
		forvalues j=0/1 {
			replace temp_score_dummy = (`var'==`i') if sample==1 & private==`j'
			egen `var'_total`i'`j' = sum(temp_score_dummy) if sample==1 & ///
				private==`j'
			replace `var'_share = `var'_total`i'`j' / temp_sample_size`j' ///
				if `var'==`i' & private==`j'
		}
	}
}

drop temp_sample_size0 temp_sample_size1 temp_score_dummy ///
kiswahili_psle_num_total00-math_ftna_num_total41

graph bar kiswahili_psle_num_share, over(private, relabel(1 "Public" ///
2 "Private") gap(5) label(angle(45))) over(kiswahili_psle_num, ///
relabel(1 "E" 2 "D" 3 "C" 4 "B" 5 "A") gap(35)) ytitle("Fraction") ///
bar(1, fcolor(gs12)) subtitle("Kiswahili Primary", color(black)) ///
name(kisw_psle, replace) graphregion(color(white)) asyvar
graph bar kiswahili_ftna_num_share, over(private, relabel(1 "Public" ///
2 "Private") gap(5) label(angle(45))) over(kiswahili_ftna_num, ///
relabel(1 "E" 2 "D" 3 "C" 4 "B" 5 "A") gap(35)) ytitle("Fraction") ///
bar(1, fcolor(gs12)) subtitle("Kiswahili Secondary", color(black)) ///
name(kisw_ftna, replace) graphregion(color(white)) asyvar
graph bar english_psle_num_share, over(private, relabel(1 "Public" ///
2 "Private") gap(5) label(angle(45))) over(english_psle_num, ///
relabel(1 "E" 2 "D" 3 "C" 4 "B" 5 "A") gap(35)) ytitle("Fraction") ///
bar(1, fcolor(gs12)) subtitle("English Primary", color(black)) ///
name(engl_psle, replace) graphregion(color(white)) asyvar
graph bar english_ftna_num_share, over(private, relabel(1 "Public" ///
2 "Private") gap(5) label(angle(45))) over(english_ftna_num, ///
relabel(1 "E" 2 "D" 3 "C" 4 "B" 5 "A") gap(35)) ytitle("Fraction") ///
bar(1, fcolor(gs12)) subtitle("English Secondary", color(black)) ///
name(engl_ftna, replace) graphregion(color(white)) asyvar
graph bar math_psle_num_share, over(private, relabel(1 "Public" ///
2 "Private") gap(5) label(angle(45))) over(math_psle_num, ///
relabel(1 "E" 2 "D" 3 "C" 4 "B" 5 "A") gap(35)) ytitle("Fraction") ///
bar(1, fcolor(gs12)) subtitle("Math Primary", color(black)) ///
name(math_psle, replace) graphregion(color(white)) asyvar
graph bar math_ftna_num_share, over(private, relabel(1 "Public" ///
2 "Private") gap(5) label(angle(45))) over(math_ftna_num, ///
relabel(1 "E" 2 "D" 3 "C" 4 "B" 5 "A") gap(35)) ytitle("Fraction") ///
bar(1, fcolor(gs12)) subtitle("Math Secondary", color(black)) ///
name(math_ftna, replace) graphregion(color(white)) asyvar

graph combine kisw_psle kisw_ftna engl_psle engl_ftna math_psle math_ftna, ///
cols(2) graphregion(color(white))

graph export "$out_path\figurea1.tif", replace


** Appendix: regional distribution of secondary school students
forvalues i=1/26 {
	gen region_dummy`i' = (region_id==`i')
}

eststo population: estpost ci region_dummy1-region_dummy26 ///
	if gpa_ftna_core!=. & peers_score_core!=. & peers_fail_share!=. & private!=.
eststo total: estpost ci region_dummy1-region_dummy26 ///
	if sample==1
eststo private: estpost ci region_dummy1-region_dummy26 ///
	if sample==1 & private==1
eststo public: estpost ci region_dummy1-region_dummy26 ///
	if sample==1 & private==0

esttab population total private public using ///
"$out_path\tablea1.tex", ///
replace cells(b(fmt(3))) varlabels(region_dummy1 "Arusha" ///
region_dummy2 "Dar Es Salaam" region_dummy3 "Dodoma" region_dummy4 "Geita" ///
region_dummy5 "Iringa" region_dummy6 "Kagera" region_dummy7 "Katavi" ///
region_dummy8 "Kigoma" region_dummy9 "Kilimanjaro" region_dummy10 "Lindi" ///
region_dummy11 "Manyara" region_dummy12 "Mara" region_dummy13 "Mbeya" ///
region_dummy14 "Morogoro" region_dummy15 "Mtwara" region_dummy16 "Mwanza" ///
region_dummy17 "Njombe" region_dummy18 "Pwani" region_dummy19 "Rukwa" ///
region_dummy20 "Ruvuma" region_dummy21 "Shinyanga" region_dummy22 "Simiyu" ///
region_dummy23 "Singida" region_dummy24 "Songwe" region_dummy25 "Tabora" ///
region_dummy26 "Tanga")
